Load packages

require(Seurat)
require(data.table)
require(dplyr)
require(ggrastr)
require(stringr)
require(Biostrings)
require(useful)
require(pals)
## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl_init' failed, running with rgl.useNULL = TRUE
require(ggplot2)

# require(devtools)
# install_github("YosefLab/VISION")
require("VISION")
## Warning: replacing previous import 'Matrix::cov2cor' by 'stats::cov2cor'
## when loading 'VISION'
## Warning: replacing previous import 'Matrix::toeplitz' by 'stats::toeplitz'
## when loading 'VISION'
## Warning: replacing previous import 'Matrix::update' by 'stats::update' when
## loading 'VISION'
## Warning: replacing previous import 'Matrix::tail' by 'utils::tail' when
## loading 'VISION'

Load anchored Seurat object (Seurat V3)

plot UMAP

DimPlot(object = BAL_list, reduction = "umap", group.by = "annot.new", pt.size = .1)

Microclsutering using VISION

#Use the non-anchored data
DefaultAssay(BAL_list) <- "RNA"

#Initialize the microcluster data frame
pools.df <- data.frame(cluster = 1, cells = 1)

patients <- c(unique(BAL_list@meta.data$pat))

#Run the microclustering for each patient separately
for(i in 1:length(patients)){
BAL_list_subset <- subset(BAL_list, cells = c(row.names(BAL_list@meta.data[BAL_list@meta.data$pat == patients[i],])))
scaled.df <- BAL_list_subset@assays$RNA@data
scaled.df <- (2^(scaled.df) )-1

#Define Pools
pools <- applyMicroClustering(as.matrix(scaled.df), cellsPerPartition = round(ncol(scaled.df)*0.05), filterThreshold = 3)
assign(paste0(patients[i], "_pools"), pools)

# Create pooled versions of expression matrix
pooledExpression <- poolMatrixCols(scaled.df, pools)
colnames(pooledExpression) <- paste(patients[i], colnames(pooledExpression), sep="_")
assign(paste0(patients[i], "_pooledExpression"), pooledExpression)

#Create overview of pooled cells
for(j in 1:length(pools)){
tmp <- data.frame(cluster = rep(names(pools)[j], length(as.character(unlist(pools[j])))), cells = as.character(unlist(pools[j])))
tmp$cluster <- paste(patients[i], tmp$cluster, sep="_")
pools.df <<- rbind(pools.df, tmp)
  }
}
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 12956
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1142
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 2685 cells into 21 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 14397
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1602
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 4555 cells into 22 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 14668
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1340
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 4420 cells into 22 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 17327
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1621
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 12085 cells into 17 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 17597
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1574
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 16190 cells into 22 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 14868
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1457
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 3060 cells into 22 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 12964
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1223
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 2640 cells into 20 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 14811
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1681
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 4343 cells into 20 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 12881
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1226
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 3399 cells into 24 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 14174
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1399
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 1524 cells into 19 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 14598
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1652
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 2566 cells into 21 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 11604
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1039
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 1414 cells into 20 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 12198
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1010
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 1788 cells into 21 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 11849
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1258
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 1108 cells into 21 pools
##     Computing a latent space for microclustering using PCA...
##     Determining lateng space genes...
##     Applying Threshold filter...removing genes detected in less than 3 cells
##       Genes Retained: 12120
##     Applying Fano filter...removing genes with Fano < 2.0 MAD in each of 30 bins
##       Genes Retained: 1302
##     Performing PCA...
##     Performing initial coarse-clustering...
##     Further partitioning coarse clusters...
##     Micro-pooling completed reducing 1344 cells into 21 pools
pools.df <- pools.df[-1,]

Check Microclusters on UMAP

row.names(pools.df) <- pools.df$cells
pools.df$cells <- NULL

tmp <- as.data.frame(BAL_list@reductions$umap@cell.embeddings)
tmp <- merge(tmp, BAL_list@meta.data, by = 0)
row.names(tmp) <- tmp$Row.names
tmp$Row.names <- NULL
tmp <- merge(tmp, pools.df, by = 0)

for(i in 1:length(patients)){
print(ggplot(tmp[tmp$pat == patients[i],], aes(x = UMAP_1, y = UMAP_2, color = cluster)) + geom_point(size=.1) + 
ggtitle(patients[i]) +
theme(legend.position  = "NULL",
      panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line.x = element_line(color="black", size = .5),
        axis.title.x=element_blank(),
        axis.line.y = element_line(color="black", size = .5)))
}